Preparation


In [1]:
%run Preparation.ipynb


ERROR: File `'Preparation.ipynb.py'` not found.

In [2]:
rmdf152 = pd.read_csv("data/1.52.csv")

relevantColumns = ['id', 'playerId', 'serverTime', 'customData.localplayerguid']

part152 = rmdf152.loc[:,relevantColumns]

df = part152


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-cf9d689c5a5a> in <module>()
----> 1 rmdf152 = pd.read_csv("data/1.52.csv")
      2 
      3 relevantColumns = ['id', 'playerId', 'serverTime', 'customData.localplayerguid']
      4 
      5 part152 = rmdf152.loc[:,relevantColumns]

NameError: name 'pd' is not defined

Filter out by date


In [ ]:
startTime = "2017-05-13T09:30:00.000Z"
endTime = "2017-05-13T17:10:00.000Z"

df = df[(df['serverTime'] > startTime) & (df['serverTime'] < endTime)]

Unique Game Sessions

Number of unique game sessions


In [ ]:
playerscount = df["playerId"].nunique()
playerscount

Number of unique devices ids


In [ ]:
localPlayerGuids = df.loc[:,['playerId', 'customData.localplayerguid']]
localPlayerGuids.columns= ['id', 'guid']
localPlayerGuids = localPlayerGuids[localPlayerGuids['guid'].notnull()]
localPlayerGuids = localPlayerGuids['guid']
localPlayerGuids.nunique()
localPlayerGuids.unique()

Duration

Duration of playing sessions


In [ ]:
durations = df.groupby("playerId").agg({ "serverTime": [ np.min, np.max  ] })
durations["duration"] = pd.to_datetime(durations["serverTime"]["amax"]) - pd.to_datetime(durations["serverTime"]["amin"])
durations["duration"] = durations["duration"].map(lambda x: np.timedelta64(x, 's'))
durations = durations.sort_values(by=['duration'], ascending=[False])
durations.head()

Duration plot


In [ ]:
durations.loc[:,'duration']
durations = durations[4:]
durations["duration_seconds"] = durations["duration"].map(lambda x: pd.Timedelta(x).seconds)
maxDuration = np.max(durations["duration_seconds"])
durations["duration_rank"] = durations["duration_seconds"].rank(ascending=False)
durations.plot(x="duration_rank", y="duration_seconds")
plt.xlabel("game session")
plt.ylabel("time played (min)")
plt.legend('')
plt.xlim(0, playerscount)
plt.ylim(0, maxDuration)
durations["duration_seconds"].describe()
durations.head()